欢迎关注“小丫画图”公众号,回复“小白”,看小视频,实现点鼠标跑代码。
小丫微信: epigenomics E-mail: figureya@126.com
作者:胡二强
小丫编辑校验
enrichr的优点是批量获得各个数据库里的富集结果。如果用clusterprofiler做富集分析,也能一次性把各数据库的结果输出一个报告,就会很方便。
输入一个基因列表(gene symbol和foldchange)和msigdb里的所有gmt文件,用clusterprofiler做富集分析,输出表格和barplot。
富集分析用哪个数据库做注释?
读paper找答案。有的文章用GO,有的用KEGG,有的用hallmark,有的从文献中收集基因集。
哪个适合我?挨个尝试太麻烦,批量全都跑出来。
用上调/下调基因分别做富集分析(ORA)?还是用GSEA跑全部基因?
两种方法 X 所有数据库,都跑出来看看。
哪个容易讲故事,最后就选哪个方法、哪个数据库。然后继续做深入研究。后续展示方式可参考https://k.koudai.com/sm2p0xYn
使用国内镜像安装包
options("repos"= c(CRAN="https://mirrors.tuna.tsinghua.edu.cn/CRAN/"))
options(BioC_mirror="http://mirrors.tuna.tsinghua.edu.cn/bioconductor/")
install.packages("msigdbr")
install.packages("shadowtext")
install.packages("ggstance")
加载包
library(clusterProfiler)
##
## clusterProfiler v3.16.1 For help: https://guangchuangyu.github.io/software/clusterProfiler
##
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
##
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:stats':
##
## filter
library(msigdbr)
library(DOSE)
## DOSE v3.14.0 For help: https://guangchuangyu.github.io/software/DOSE
##
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
library(enrichplot)
library(ggplot2)
library(plyr)
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:clusterProfiler':
##
## arrange, mutate, rename, summarise
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Sys.setenv(LANGUAGE = "en") #显示英文报错信息
options(stringsAsFactors = FALSE) #禁止chr转成factor
自定义函数
## 数据处理函数
merge_result2 <- function(enrichResultList, output = "compareClusterResult") {
if ( !is(enrichResultList, "list")) {
stop("input should be a name list...")
}
if ( is.null(names(enrichResultList))) {
stop("input should be a name list...")
}
x <- lapply(enrichResultList, as.data.frame)
names(x) <- names(enrichResultList)
y <- ldply(x, "rbind")
if (output == "compareClusterResult") {
y <- plyr::rename(y, c(.id="Cluster"))
y$Cluster = factor(y$Cluster, levels=names(enrichResultList))
return(new("compareClusterResult",
compareClusterResult = y))
}
y <- plyr::rename(y, c(.id="Category"))
if (output == "enrichResult") {
return(new("enrichResult",
result = y))
}
if (output == "gseaResult") {
return(new("gseaResult",
result = y))
}
}
keep_category <- function(em_ORA, n) {
table_em <- as.numeric(table(em_ORA$Category))
start <- rep(0, length(table_em) - 1)
for(i in seq_len(length(table_em) - 1)) {
start[i] <- sum(table_em[seq_len(i)])
}
showCategorys <- sapply(table_em, function(x) min(n, x))
start <- c(0, start) + 1
end <- start + showCategorys - 1
keep <- NULL
for(i in seq_len(length(start))) {
keep <- c(keep, c(start[i] : end[i]))
}
return(keep)
}
enrich_filter <- function(em_result, showCategory) {
keep <- keep_category(em_result, showCategory)
em_result <- em_result[keep, ]
if ("NES" %in% colnames(em_result))
em_result$Count <- em_result$core_enrichment %>%
strsplit(split = "/") %>%
vapply(length, FUN.VALUE = 1)
return(em_result)
}
## 作图函数
em_plot <- function(em_1 = NULL, em_2 = NULL, showCategory = 2, fill = "p.adjust", hjust = 1) {
fill <- match.arg(fill, c("Category", "p.adjust", "log10_p.adjust"))
result1 <- enrich_filter(em_1, showCategory)
if (is.null(em_2)) {
result <- result1
} else {
result2 <- enrich_filter(em_2, showCategory)
result2$Count <- -result2$Count
result <- rbind(result1, result2)
}
result$Category <- gsub("\n.*", "", result$Category)
result$log10_p.adjust <- log10(result$p.adjust)
data_plot <- result[, c("ID", "Category", "p.adjust", "log10_p.adjust", "Count")]
data_plot2 <- data_plot
data_plot2$ID <- factor(data_plot2$ID, levels = unique(data_plot2$ID))
data_plot2 <- plyr::rename(data_plot2, c("Count" = "gene_number"))
h_just <- ifelse(data_plot2$gene_number < 0, -hjust, hjust)
ggplot(data_plot2, aes_string(x = "gene_number", y = "ID", fill = fill)) +
geom_col() +
geom_text(aes_(x =~ gene_number + h_just, label =~ abs(gene_number)),
color="black") +
scale_x_continuous(label = abs,
expand = expansion(mult = c(.01, .01))) + #两侧留空
theme_classic() +
ylab("") +
theme(axis.title.x = element_text(size = 15)) +
facet_grid(Category ~ ., scales="free", space="free")
}
三种方法,根据自己的需要任选其一,或组合使用:
最简单常用
# 查看msigdbr所支持的物种
# 这里没有我要的物种怎么办?看方法三
msigdbr_species()
## # A tibble: 11 x 2
## species_name species_common_name
## <chr> <chr>
## 1 Bos taurus cattle
## 2 Caenorhabditis elegans roundworm
## 3 Canis lupus familiaris dog
## 4 Danio rerio zebrafish
## 5 Drosophila melanogaster fruit fly
## 6 Gallus gallus chicken
## 7 Homo sapiens human
## 8 Mus musculus house mouse
## 9 Rattus norvegicus Norway rat
## 10 Saccharomyces cerevisiae baker's or brewer's yeast
## 11 Sus scrofa pig
# 以人为例
gmt <- msigdbr(species = "Homo sapiens")
gmt2 <- gmt%>%
dplyr::select(gs_name, entrez_gene)
gmts <- split(gmt2, gmt$gs_cat)
可以从MSigDB下载自己想要分析的注释对应的gmt文件,保存到当前文件夹。
做单细胞的小伙伴,可以用FigureYa194pySCENIC生成的regulon的gmt文件,进一步在更多数据集里深入挖掘,例如TCGA数据。
# 读取当前文件夹内的所有gmt文件
files = list.files(pattern="*.gmt")
gmts <- setNames(lapply(files, read.gmt), gsub(".v7.2.entrez.gmt", "", files))
如果你研究的领域比较新或小众,那么现有的注释文件可能不能满足你的分析需求。这时往往会自己总结一个基因集:
我们首先来看一下注释文件gmts的结构:
## 类型为list
class(gmts)
## [1] "list"
## list中的每一项都是一个数据框,数据框有两列,第一列是term名称,第二列是基因。
head(gmts[[1]])
## # A tibble: 6 x 2
## gs_name entrez_gene
## <chr> <int>
## 1 chr10p11 26983
## 2 chr10p11 399746
## 3 chr10p11 100288319
## 4 chr10p11 100420618
## 5 chr10p11 91074
## 6 chr10p11 94134
可以看到,注释数据为一个list,list中的每一项都是一个数据框,代表来源于一个category的注释数据。每个数据框包含两列,第一列为term(如通路/功能/感兴趣的基因集),第二列为gene id(或gene symbol等)。
当我们从某篇文献中看到一个感兴趣的基因集,希望将它一起放入到已有的注释数据中一起做富集时,可以进行如下操作:
(1)将这些感兴趣的基因集处理成annotation.txt所示的格式,此文件有两列,第一列是用户自定义的term name,第二列是此term中对应的基因。
(2)得到所研究物种的背景基因,它可以是该物种的所有基因,也可以是表达谱中所有具有表达值的基因,如background.txt。
(3)将新的注释信息与方法一或二的注释合并:
anno <- read.table("annotation.txt", sep="\t", header = T)
background <- read.table("background.txt", sep="\t", header = F)
background <- setdiff(background[,1], anno$entrez_gene)
bg_anno <- data.frame(gs_name = rep("background", length(background)), entrez_gene = background)
anno_data <- rbind(anno, bg_anno)
gmts$my_anno <- as_tibble(anno_data)
easy_input_limma.csv,差异表达分析结果。是FigureYa59Plus_GEO2DEG的输出文件,可无缝对接。
需要两列:基因名和Foldchange(或其他有意义的连续变量,例如ChIP/ATAC-seq的peak分数)。
# 加载差异表达分析结果
gsym.fc <- read.csv("easy_input_limma.csv")
colnames(gsym.fc)[1] <- "SYMBOL"
gsym.fc[1:3,]
## SYMBOL logFC AveExpr t P.Value adj.P.Val B
## 1 KLK10 8.778049 10.50675 111.5755 3.802229e-11 1.760315e-07 15.41764
## 2 FXYD3 7.748971 10.45958 107.2722 4.809044e-11 1.760315e-07 15.29791
## 3 KLK7 9.138924 10.59638 102.6583 6.252983e-11 1.760315e-07 15.15779
#把gene symbol转换为ENTREZ ID
#此处物种是人,其他物种的ID转换方法,请参考FigureYa52GOplot
gsym.id <- bitr(gsym.fc$SYMBOL, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Hs.eg.db")
## Loading required package: org.Hs.eg.db
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
##
## clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
## clusterExport, clusterMap, parApply, parCapply, parLapply,
## parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, append, as.data.frame, basename, cbind, colnames,
## dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
## grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
## order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
## rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
## union, unique, unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:plyr':
##
## rename
## The following object is masked from 'package:clusterProfiler':
##
## rename
## The following object is masked from 'package:base':
##
## expand.grid
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:plyr':
##
## desc
## The following object is masked from 'package:clusterProfiler':
##
## slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:clusterProfiler':
##
## select
##
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gsym.fc$SYMBOL, fromType = "SYMBOL", toType = "ENTREZID", :
## 21.42% of input gene IDs are fail to map...
#让基因名、ENTREZID、foldchange对应起来
gsym.fc.id <- merge(gsym.fc, gsym.id, by="SYMBOL", all=F)
#按照foldchange排序
gsym.fc.id.sorted <- gsym.fc.id[order(gsym.fc.id$logFC, decreasing = T),]
#获得ENTREZID、foldchange列表,做为GSEA的输入
id.fc <- gsym.fc.id.sorted$logFC
names(id.fc) <- gsym.fc.id.sorted$ENTREZID
查看clusterProfiler的最新用法,请移步Y叔的在线电子书:https://yulab-smu.top/clusterProfiler-book/
# 自定义函数
gsea_func <- function(x, genelist, readable = FALSE) {
GSEA_result <- GSEA(genelist, TERM2GENE = x, eps = 0)
if (readable & nrow(GSEA_result) > 0)
GSEA_result <- setReadable(GSEA_result, 'org.Hs.eg.db', #物种
'ENTREZID') #转回gene symbol
return(GSEA_result)
}
em <- setNames(lapply(gmts, gsea_func, id.fc, readable = TRUE), names(gmts)) %>%
merge_result2(output = "gseaResult")
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## leading edge analysis...
## done...
## preparing geneSet collections...
## GSEA analysis...
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.02% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## no term enriched under specific pvalueCutoff...
# 把富集分析结果输出到文件
write.csv(em@result, "output_GSEA.csv", quote = F)
你可能喜欢打开output_GSEA.csv,总揽各种注释数据库的富集结果,或者画柱状图来看。
画柱状图来总揽各个注释数据库的富集分析结果,更直观。
显著的term非常多,不好全部在图中画出来,可以用showCategory参数调整每种注释画出的term的个数。
默认情况下条形图的颜色代表此term校正后的P值,可以通过修改fill参数来映射其他变量,如“Category”, “p.adjust” 和 “log10_p.adjust”。
参数hjust表示gene number的label距离bar顶端的距离,默认为1。
em_plot(em, showCategory = 5, fill = "log10_p.adjust", hjust = 5)
em_plot(em, showCategory = 5, fill = "Category", hjust = 5) + scale_fill_brewer(palette="Set1")
ggsave("batchGSEA.pdf", width = 12, height = 12)
NES有正负之分,可以把负的画到左侧,正的画到右侧。
em_GSEA1 <- em_GSEA2 <- em
res <- em@result
em_GSEA1@result <- res[which(res$NES > 0), ] # 被激活的通路
em_GSEA2@result <- res[which(res$NES < 0), ] # 被抑制的通路
em_plot(em_GSEA1, em_GSEA2, showCategory = 2, fill = "log10_p.adjust", hjust = 10)
em_plot(em_GSEA1, em_GSEA2, showCategory = 2, fill = "Category", hjust = 12) + scale_fill_brewer(palette="Set1")
ggsave("batchGSEA_sep.pdf", width = 12, height = 12)
接下来,结合你对自己研究领域的认识,提取比较理想的数据集的富集结果,作为文件supplementary file。进而DIY各种富集分析结果图,例如FigureYa11bubble那种泡泡图。
这里提取Catagory列为H的hallmark注释:
# 自定义函数
result_output <- function(em_data, category) {
em_data <- as.data.frame(em_data)
file_out <- em_data[which(em_data$Category == category), ]
}
file_out <- result_output(em, "H") # 提取Catagory列为`H`的结果
write.csv(file_out, "output_GSEA_H.csv", quote = F)
你可能不在乎用哪种注释,只想知道差异表达基因在我感兴趣的生物学过程有没有富集。
我们从所有注释中搜索关键词,例如CHECKPOINT,发现在3个注释中都有cell cycle checkpoints,说明你获取差异表达基因的那种处理条件/基因突变可能影响了细胞周期。
get_term <- function(em_data, term){
ids <- grep(term, as.data.frame(em_data)$ID)
em_data[ids, ]
}
file_out <- get_term(em@result, "CHECKPOINT")
DT::datatable(file_out, escape = F, rownames=F)
其实只需要一个基因名文件。如果你已经拿到基因名easy_input_up.txt、easy_input_down.txt(至少要有一个文件)就可以跳过这步,直接进入“ORA富集分析”。
考虑到差异基因的筛选参数决定了筛出哪些差异表达基因,进而影响ORA的结果。因此,这里从差异表达基因开始,一气呵成,方便调整筛基因的参数。
easy_input_limma.csv,同上,差异表达分析结果。是FigureYa59Plus_GEO2DEG的输出文件,可无缝对接
# 加载差异表达分析结果
gsym.fc <- read.csv("easy_input_limma.csv")
colnames(gsym.fc)[1] <- "SYMBOL"
gsym.fc[1:3,]
## SYMBOL logFC AveExpr t P.Value adj.P.Val B
## 1 KLK10 8.778049 10.50675 111.5755 3.802229e-11 1.760315e-07 15.41764
## 2 FXYD3 7.748971 10.45958 107.2722 4.809044e-11 1.760315e-07 15.29791
## 3 KLK7 9.138924 10.59638 102.6583 6.252983e-11 1.760315e-07 15.15779
# cutoff
logFCcut <- 1 #log2-foldchange
adjPcut <- 0.05 #adj.P.value
# 筛选差异基因
geneup <- gsym.fc[gsym.fc$logFC > logFCcut & gsym.fc$adj.P.Val < adjPcut, ]$SYMBOL
genedown <- gsym.fc[gsym.fc$logFC < -logFCcut & gsym.fc$adj.P.Val < adjPcut, ]$SYMBOL
# 数量
length(geneup)
## [1] 946
length(genedown)
## [1] 986
# 保存到文件,便于套用格式
write.table(geneup, "easy_input_up.txt", quote = F, row.names = F, col.names = F)
write.table(genedown, "easy_input_down.txt", quote = F, row.names = F, col.names = F)
# 自定义函数
enrich_func <- function(x, gene, readable = FALSE) {
en_result <- enricher(gene, TERM2GENE = x)
if (readable & nrow(en_result) > 0)
en_result <- setReadable(en_result, 'org.Hs.eg.db', #物种
'ENTREZID')
return(en_result)
}
# 先以上调表达的基因为例
# 加载基因名
geneup <- read.table("easy_input_up.txt")$V1
# 把gene symbol转换为ENTREZ ID
# 此处物种是人,其他物种的ID转换方法,请参考FigureYa52GOplot
geneup.id <- bitr(geneup, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(geneup, fromType = "SYMBOL", toType = "ENTREZID", OrgDb =
## "org.Hs.eg.db"): 13.07% of input gene IDs are fail to map...
# 富集分析
em_ORAup <- setNames(lapply(gmts, enrich_func, geneup.id$ENTREZID, readable = TRUE), names(gmts)) %>%
merge_result2(output = "enrichResult")
# 输出到文件
write.csv(em_ORAup@result, "output_ORA_up.csv", quote = F)
下调表达的基因也是一样的操作
genedown <- read.table("easy_input_down.txt")$V1
genedown.id <- bitr(genedown, fromType = "SYMBOL", toType = "ENTREZID", OrgDb = "org.Hs.eg.db")
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(genedown, fromType = "SYMBOL", toType = "ENTREZID", OrgDb =
## "org.Hs.eg.db"): 15.4% of input gene IDs are fail to map...
em_ORAdown <- setNames(lapply(gmts, enrich_func, genedown.id$ENTREZID, readable = TRUE), names(gmts)) %>%
merge_result2(output = "enrichResult")
或者你想把上下调的放一起也看看
genesupdown <- union(geneup.id$ENTREZID, genedown.id$ENTREZID)
em_ORA <- setNames(lapply(gmts, enrich_func, genesupdown, readable = TRUE), names(gmts)) %>%
merge_result2(output = "enrichResult")
# 上调表达基因的富集结果
em_plot(em_ORAup, showCategory = 5, fill = "Category", hjust = 10) + scale_fill_brewer(palette="Set1")
# 下调表达基因的富集结果
em_plot(em_ORAdown, showCategory = 5, fill = "Category", hjust = 5) + scale_fill_brewer(palette="Set1")
# 上调的画在右侧,下调的画在左侧
em_plot(em_ORAup, em_ORAdown, showCategory = 5, fill = "Category", hjust = 12) + scale_fill_brewer(palette="Set1")
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
带你顺利衔接enrichplot
# 每种注释筛选3个term
keep1 <- keep_category(em_ORAup, 3)
keep2 <- keep_category(em_ORAdown, 3)
em_ora1 <- em_ORAup[keep1, ]
em_ora2 <- em_ORAdown[keep2, ]
# 上调画右边,下调画左边
em_ora2$Count <- -em_ora2$Count
em_ora <- new("enrichResult",
result = rbind(em_ora1, em_ora2))
# 画barplot
#barplot(em_ora, showCategory = nrow(em_ora)) +
# scale_x_continuous(label = abs) +
# facet_grid(Category ~ ., scales="free",space="free")
# 画dotplot
dotplot(em_ora, showCategory = nrow(em_ora)) +
scale_x_continuous(label = abs) +
facet_grid(Category ~ ., scales="free",space="free")
同上,提取Catagory列为H的hallmark注释:
file_out <- result_output(em_ORA, "H") # 上下调一起
write.csv(file_out, "output_ORA_H.csv", quote = F)
file_out <- result_output(em_ORAup, "H") #上调表达基因
write.csv(file_out, "output_ORAup_H.csv", quote = F)
file_out <- result_output(em_ORAdown, "H") # 下调表达基因
write.csv(file_out, "output_ORAdown_H.csv", quote = F)
跟前面做GSEA输出的output_GSEA_H.csv文件对比看看,大同小异
同上,从所有注释中搜索CHECKPOINT,发现下调表达的基因在多个注释中都有cell cycle checkpoints,说明你获取差异表达基因的那种处理条件/基因突变可能抑制了cell cycle checkpoints。
# 提取带有`CHECKPOINT`的富集结果
file_out <- get_term(em_ORAup@result, "CHECKPOINT")
DT::datatable(file_out, escape = F, rownames=F)
# 上调表达的基因没有富集到带checkpoint字样的term
file_out <- get_term(em_ORAdown@result, "CHECKPOINT")
DT::datatable(file_out, escape = F, rownames=F)
# 下调表达基因有多个富集term里有checkpoint字样
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] zh_CN.UTF-8/zh_CN.UTF-8/zh_CN.UTF-8/C/zh_CN.UTF-8/zh_CN.UTF-8
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] org.Hs.eg.db_3.11.4 AnnotationDbi_1.50.3 IRanges_2.22.2
## [4] S4Vectors_0.26.1 Biobase_2.48.0 BiocGenerics_0.34.0
## [7] dplyr_1.0.2 plyr_1.8.6 ggplot2_3.3.2
## [10] enrichplot_1.8.1 DOSE_3.14.0 msigdbr_7.2.1
## [13] clusterProfiler_3.16.1
##
## loaded via a namespace (and not attached):
## [1] bit64_4.0.5 RColorBrewer_1.1-2 progress_1.2.2
## [4] httr_1.4.2 tools_4.0.2 DT_0.16
## [7] utf8_1.1.4 R6_2.5.0 DBI_1.1.0
## [10] colorspace_2.0-0 withr_2.3.0 tidyselect_1.1.0
## [13] gridExtra_2.3 prettyunits_1.1.1 bit_4.0.4
## [16] compiler_4.0.2 cli_2.2.0 scatterpie_0.1.5
## [19] xml2_1.3.2 labeling_0.4.2 triebeard_0.3.0
## [22] scales_1.1.1 ggridges_0.5.2 stringr_1.4.0
## [25] digest_0.6.27 rmarkdown_2.5 pkgconfig_2.0.3
## [28] htmltools_0.5.0 htmlwidgets_1.5.2 rlang_0.4.9
## [31] RSQLite_2.2.1 gridGraphics_0.5-0 farver_2.0.3
## [34] generics_0.1.0 jsonlite_1.7.1 crosstalk_1.1.0.1
## [37] BiocParallel_1.22.0 GOSemSim_2.14.2 magrittr_2.0.1
## [40] ggplotify_0.0.5 GO.db_3.11.4 Matrix_1.2-18
## [43] fansi_0.4.1 Rcpp_1.0.5 munsell_0.5.0
## [46] viridis_0.5.1 lifecycle_0.2.0 stringi_1.5.3
## [49] yaml_2.2.1 ggraph_2.0.4 MASS_7.3-53
## [52] qvalue_2.20.0 grid_4.0.2 blob_1.2.1
## [55] ggrepel_0.8.2 DO.db_2.9 crayon_1.3.4
## [58] lattice_0.20-41 graphlayouts_0.7.1 cowplot_1.1.0
## [61] splines_4.0.2 hms_0.5.3 knitr_1.30
## [64] pillar_1.4.7 fgsea_1.14.0 igraph_1.2.6
## [67] reshape2_1.4.4 fastmatch_1.1-0 glue_1.4.2
## [70] evaluate_0.14 downloader_0.4 data.table_1.13.2
## [73] BiocManager_1.30.10 vctrs_0.3.5 tweenr_1.0.1
## [76] urltools_1.7.3 gtable_0.3.0 purrr_0.3.4
## [79] polyclip_1.10-0 tidyr_1.1.2 assertthat_0.2.1
## [82] xfun_0.19 ggforce_0.3.2 europepmc_0.4
## [85] tidygraph_1.2.0 viridisLite_0.3.0 tibble_3.0.4
## [88] rvcheck_0.1.8 memoise_1.1.0 ellipsis_0.3.1